from bs4 import BeautifulSoup
import csv

names = []
directors = []
actors = []
years = []
countries = []
types = []
srcs = []

for i in range(1, 11):
    with open(f'top250/src{i}.html', 'r', encoding='utf-8') as f:
        html = f.read()

    bs = BeautifulSoup(html, 'lxml')

    # 电影名称
    for item in bs.select('div.item div.hd span.title:nth-child(1)'):
        names.append(item.text.strip())

    # 导演、主演、年份、国家、类型
    for item in bs.select('div.item div.bd p:nth-child(1)'):
        s = item.decode_contents()
        as1 = s.split("<br/>")
        as3 = as1[0].split(' / / ')  # 注意这里是全角空格 + 正斜杠

        directors.append(as3[0].split(':')[1].strip() if ':' in as3[0] else '无信息')

        if len(as3) > 1 and ':' in as3[1]:
            actors.append(as3[1].split(':')[1].strip())
        else:
            actors.append('无信息')

        if len(as1) > 1:
            as2 = as1[1].strip().split(' / ')
            years.append(as2[0].strip() if len(as2) > 0 else '未知')
            countries.append(as2[1].strip() if len(as2) > 1 else '未知')
            types.append(as2[2].strip() if len(as2) > 2 else '未知')
        else:
            years.append('未知')
            countries.append('未知')
            types.append('未知')

    for item in bs.select('div.item img'):
        srcs.append(item.get('src'))

# 正确使用 DictWriter 写入字段名和数据
with open('movie250.csv', 'w', encoding='utf-8-sig', newline='') as f:
    fieldnames = ['名称', '导演', '主演', '年份', '国家', '类型', '图片地址']
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()

    for i in range(len(names)):
        writer.writerow({
            '名称': names[i],
            '导演': directors[i],
            '主演': actors[i],
            '年份': years[i],
            '国家': countries[i],
            '类型': types[i],
            '图片地址': srcs[i]
        })

print(f'共写入 {len(names)} 部电影到 movie250.csv')
